Objectives

The goal of this step in the workflow is to visually examine peatland water table elevation (WTE) data at 7 distinct sites in the Marcell Experimental Forest. To do so, we use 2 types of WTE (aka bogwell) data:

  1. Manual stripcharts and
  2. Shaft encoders.

Install and load necessary R packages

Set date range of interest

min_year = 2017
max_year = 2021

Peatland Daily Water Table Elevation (Bogwell Data)

First, we need to visualize the stripchart data for peatland daily water table elevation (aka bogwell data) at each of the 7 Marcell Experimental Forest sites:

  1. S1
  2. S2
  3. S3
  4. S4
  5. S5
  6. S6
  7. Bog Lake Fen (BOGLK)

Water table elevation (WTE) is typically measured in either meters or feet above sea level. For this project, we typically used meters above sea level.

More information about peatland and upland water elevation data collected at the Marcell Experimental Forest can be found on the Environmental Data Initiative Repository Portal.

Manual Stripcharts

  • Read in the most up-to-date data from the following Box file path: External-MEF_DATA/Hydro/WaterTableElevation/Bogwells/L1

    • Less up-to-date, already published data can be found at the following Box file path: External-MEF_DATA/EDI/peatland_water_table_daily/edi.562.2/data_objects/
  • Clean the data into a tidy format

    • Subset data to 2017 - present
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/WaterTableElevation/Bogwells/L1"

#read in the raw data 
bogwell <- read_csv(here(filepath, "L1_Peatland_well_daily_history_2021forMia.csv"))

#clean the data
##note: we do not want any E values for the flag column 
bogwell_clean <- bogwell %>% 
  clean_names() %>% 
  mutate(year = format(as.POSIXct(date, format = '%Y-%m-%d', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year)) %>% 
  subset(year >= min_year & year <= max_year)
  • Plot all sites together to overview the 2017 - 2021 stripchart streamflow data
#plot
p_bogwell <- ggplot(data = bogwell_clean) + 
  geom_line(aes(x = date, y = wte, color = peatland)) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")"),
       color = "Site"
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#static plot 
p_bogwell

S1 Manual Stripchart

bogwell_clean_s1 <- bogwell_clean %>% 
    subset(peatland == "S1")

#plot
p_bogwell_s1 <- ggplot(data = bogwell_clean_s1) + 
  geom_line(aes(x = date, 
                y = wte, 
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", wte))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S1 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_bogwell_s1,
         tooltip = "text")

S2 Manual Stripchart

bogwell_clean_s2 <- bogwell_clean %>% 
    subset(peatland == "S2")

#plot
p_bogwell_s2 <- ggplot(data = bogwell_clean_s2) + 
  geom_line(aes(x = date, 
                y = wte, 
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", wte))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S2 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_bogwell_s2,
         tooltip = "text")

S3 Manual Stripchart

bogwell_clean_s3 <- bogwell_clean %>% 
    subset(peatland == "S3")

#plot
p_bogwell_s3 <- ggplot(data = bogwell_clean_s3) + 
  geom_line(aes(x = date, 
                y = wte, 
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", wte))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S3 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_bogwell_s3,
         tooltip = "text")

S4 Manual Stripchart

bogwell_clean_s4 <- bogwell_clean %>% 
    subset(peatland == "S4")

#plot
p_bogwell_s4 <- ggplot(data = bogwell_clean_s4) + 
  geom_line(aes(x = date, 
                y = wte, 
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", wte))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S4 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_bogwell_s4,
         tooltip = "text")

S5 Manual Stripchart

bogwell_clean_s5 <- bogwell_clean %>% 
    subset(peatland == "S5")

#plot
p_bogwell_s5 <- ggplot(data = bogwell_clean_s5) + 
  geom_line(aes(x = date, 
                y = wte, 
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", wte))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S5 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_bogwell_s5,
         tooltip = "text")

S6 Manual Stripchart

bogwell_clean_s6 <- bogwell_clean %>% 
    subset(peatland == "S6")

#plot
p_bogwell_s6 <- ggplot(data = bogwell_clean_s6) + 
  geom_line(aes(x = date, 
                y = wte, 
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", wte))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S6 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_bogwell_s6,
         tooltip = "text")

Bog Lake Fen (BOGLK) Manual Stripchart

bogwell_clean_boglk <- bogwell_clean %>% 
    subset(peatland == "BOGLK")

#plot
p_bogwell_boglk <- ggplot(data = bogwell_clean_boglk) + 
  geom_line(aes(x = date, 
                y = wte, 
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", wte))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("BOGLK Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_bogwell_boglk,
         tooltip = "text")

Shaft Encoders

Note that shaft encoder bogwell data comes from 2 main sources:

  1. The RealTimeData Box directory
  2. The annual_appended_logger_files Box directory
  • Create a function for reading in Campbell DataLogger .dat files:
readCDL = function(file){
  
  # read data file starting on 5th line
  dat <- read.csv(file, sep=",",header=FALSE,skip=4,na.strings="NAN",stringsAsFactors=F)
  
  # Read in just the header line (l2)
  # unlist the line, and remove quotes 
  h <- readLines(file, n=2)[2]
  n <- as.factor(unlist(strsplit(h, ",")) )
  n2 <- gsub('"', "", n)
  
  # assign column names to dataframe
  colnames(dat) = n2
  
  return(dat)
}
  • Create constants to convert between feet and meters:
MetersPerFoot = 0.3048
FeetPerMeter = 3.28048

S1 Shaft Encoder

  • Read in the data from the following Box file path: External-MEF_DATA/DataDump/RealTimeData

  • Clean the data into a tidy format

#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"

#read in the raw data 
##note: this data is only 2022 
s1_se <- readCDL(file = here(filepath, "S1-EM3_Table1.dat"))

#clean the data 
s1_se_clean <- s1_se %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         wt_elev) %>% 
  #rename columns 
  rename(datetime = timestamp,
         wt_elev_ft = wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    wt_elev_ft = as.numeric(wt_elev_ft),
    #calculate WTE in meters
    wt_elev_m = wt_elev_ft * MetersPerFoot)
  • Read in 2019 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appended_logger_files/2019
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

#set the year
year = "2019"

#set the file name 
file = "S1-EM3_Table1.dat"

#read in 2019 data 
s1_2019 <- readCDL(file = here(filepath, year, file))

#clean 2019 data 
s1_2019_clean <- s1_2019 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         wt_elev) %>% 
  #rename columns 
  rename(datetime = timestamp,
         wt_elev_ft = wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    wt_elev_ft = as.numeric(wt_elev_ft),
    #calculate WTE in meters
    wt_elev_m = wt_elev_ft * MetersPerFoot)
  • Read in 2020 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appended_logger_files/2020
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

#set the year
year = "2020"

#set the file name 
file = "S1-EM3_Table1.dat"

#read in 2020 data
#note: this data is 2020 - 2022 
s1_2020 <- readCDL(file = here(filepath, year, file))

#clean the 2020 data 
s1_2020_clean <- s1_2020 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         wt_elev) %>% 
  #rename columns 
  rename(datetime = timestamp,
         wt_elev_ft = wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    wt_elev_ft = as.numeric(wt_elev_ft),
    #calculate WTE in m 
    wt_elev_m = wt_elev_ft * MetersPerFoot)
  • Combine all S1 data from both the RealTimeData folder and annual_appended_logger_files folder in Box
  • Note that this is all WTE observations, not daily max WTE
  • Plot
s1_all <- rbind(s1_se_clean, s1_2019_clean, s1_2020_clean) %>% 
  #remove NA values
  filter(!is.na(wt_elev_m)) %>% 
  #extract year 
  mutate(year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                             tz = "GMT"),
                  format = '%Y'),
    year = as.numeric(year)) %>% 
  #remove duplicate entries 
  unique()

#set min and max year 
min_year = min(s1_all$year)
max_year = max(s1_all$year)

#plot
p_s1 <- ggplot(data = s1_all) + 
  geom_line(aes(x = datetime,
                y = wt_elev_m,
                group = 1,
                text = paste0("Date: ", datetime, "\n",
                              "Water Table Elevation (m): ", round(wt_elev_m, digits = 3)))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S1 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_s1,
         tooltip = "text")

S2 Shaft Encoder

  • Read in the 2022 data from the following Box file path: External-MEF_DATA/DataDump/RealTimeData
    • We are interested in the MaxWTElev column
  • Clean the data into a tidy format
  • Note that there is one WTE observation per day
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"

#read in the raw data 
s2_se <- readCDL(file = here(filepath, "S2_bogwell_S2BW_Daily.dat"))

#clean the data
s2_se_clean <- s2_se %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    #calculate WTE in m 
    max_wt_elev_m = max_wt_elev_ft * MetersPerFoot)
  • Read in 2019 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appended_logger_files/2019
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

#set the year
year = "2019"

#set the file name 
file = "S2bog_S2BW_Daily.dat"

#read in 2019 data 
s2_2019 <- readCDL(file = here(filepath, year, file))

#clean 2019 data 
s2_2019_clean <- s2_2019 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev,
         max_wt_elev_m) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    #calculate WTE in meters
    max_wt_elev_m = as.numeric(max_wt_elev_m))
  • Read in 2020 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appended_logger_files/2020
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

#set the year
year = "2020"

#set the file name 
file = "S2bog_S2BW_Daily.dat"

#read in 2020 data 
s2_2020 <- readCDL(file = here(filepath, year, file))

#clean 2020 data 
s2_2020_clean <- s2_2020 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev,
         max_wt_elev_m) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    #calculate WTE in meters
    max_wt_elev_m = as.numeric(max_wt_elev_m))
  • Combine all S2 data from both the RealTimeData folder and annual_appended_logger_files folder in Box
  • Note there is one observation per day
  • Plot
s2_all <- rbind(s2_se_clean, s2_2019_clean, s2_2020_clean) %>% 
  #remove NA values
  filter(!is.na(max_wt_elev_m)) %>% 
  #remove rows where WTE = 0 
  subset(max_wt_elev_m > 0) %>% 
  #extract year 
  mutate(year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                             tz = "GMT"),
                  format = '%Y'),
    year = as.numeric(year)) %>% 
  #remove duplicate entries 
  unique()

#set min and max year 
min_year = min(s2_all$year)
max_year = max(s2_all$year)

#plot
p_s2 <- ggplot(data = s2_all) + 
  geom_line(aes(x = datetime,
                y = max_wt_elev_m,
                group = 1,
                text = paste0("Date: ", datetime, "\n",
                              "Water Table Elevation (m): ", round(max_wt_elev_m, digits = 3)))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S2 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_s2,
         tooltip = "text")

S3 Shaft Encoder

S3 was omitted due to unreliable data.

S4 Shaft Encoder

  • Read in 2020 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appended_logger_files/2020
  • Note there is one observation per day
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

year = "2020"

file = "S4_bogwell_S4BW_Daily.dat"

#read in 2020 data 
s4_2020 <- readCDL(file = here(filepath, year, file))

#clean the data 
s4_2020_clean <- s4_2020 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev,
         max_wt_elev_m) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = as.numeric(max_wt_elev_m), 
    date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year))

#remove unreliable values 
s4_2020_clean$max_wt_elev_m[which(s4_2020_clean$max_wt_elev_m > 428.75)] <- NA

#set min and max year 
min_year = min(s4_2020_clean$year)
max_year = max(s4_2020_clean$year)
  • Plot
#plot 
p_s4 <- ggplot(data = s4_2020_clean) + 
  geom_line(aes(x = datetime, 
                y = max_wt_elev_m,
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", max_wt_elev_m))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m)", 
       title = paste0("S4 Bogwell Water Table Elevation (", min_year, "-", max_year, ")")
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))

#interactive plot
ggplotly(p_s4,
         tooltip = "text")

S5 Shaft Encoder

  • Read in 2020 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appended_logger_files/2020
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

year = "2020"

file = "S5_bogwell_S5BW_Daily.dat"

#read in 2020-2022 data 
s5_2020 <- readCDL(file = here(filepath, year, file))

#clean the data 
s5_2020_clean <- s5_2020 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev,
         max_wt_elev_m) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = as.numeric(max_wt_elev_m), 
    date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year))
  • Read in the 2022 data from the following Box file path: External-MEF_DATA/DataDump/RealTimeData

  • Clean the 2022 data into a tidy format

#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"

#read in the raw data 
##note: this data is 2021 - 2022 
s5_se <- readCDL(file = here(filepath, "S5_bogwell_S5BW_Daily.dat"))

#clean the data 
s5_se_clean <- s5_se %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev,
         max_wt_elev_m) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = as.numeric(max_wt_elev_m), 
    date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year))
  • Combine all S5 data from both the RealTimeData folder and annual_appnded_logger_files folder in Box
  • Note there is one observation per day
s5_all <- rbind(s5_2020_clean, s5_se_clean) %>% 
  #remove NA values
  filter(!is.na(max_wt_elev_m)) %>% 
  #remove duplicate observations 
  distinct()

#remove unreliable values 
s5_all$max_wt_elev_m[which(s5_all$max_wt_elev_m > 424)] <- NA
s5_all$max_wt_elev_m[which(s5_all$date == as.POSIXct("2022-01-31", tz = "GMT"))] <- NA
s5_all$max_wt_elev_m[which(s5_all$date == as.POSIXct("2022-03-02", tz = "GMT"))] <- NA
s5_all$max_wt_elev_m[which(s5_all$date == as.POSIXct("2022-03-06", tz = "GMT"))] <- NA
s5_all$max_wt_elev_m[which(s5_all$date == as.POSIXct("2022-03-15", tz = "GMT"))] <- NA

#set min and max year 
min_year = min(s5_se_clean$year)
max_year = max(s5_se_clean$year)

#plot 
p_s5 <- ggplot(data = s5_all) + 
  geom_line(aes(x = datetime, 
                y = max_wt_elev_m,
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Water Table Elevation (m): ", max_wt_elev_m))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m)", 
       title = paste0("S5 Bogwell Water Table Elevation (", min_year, "-", max_year, ")")
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))

#interactive plot
ggplotly(p_s5,
         tooltip = "text")

S6 Shaft Encoder

  • Read in the 2022 data from the following Box file path: External-MEF_DATA/DataDump/RealTimeData

  • Clean the 2022 data into a tidy format

#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"

#read in the raw data 
##note: this data is 2021 - 2022 
s6_se <- readCDL(file = here(filepath, "S6_bogwell_S6BW_Daily.dat"))

#clean the data 
s6_se_clean <- s6_se %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev,
         max_wt_elev_m) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = as.numeric(max_wt_elev_m), 
    date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year))
  • Read in 2019 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appended_logger_files/2019
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

year = "2019"

file = "S6_bogwell_S6BW_Daily.dat"

#read in 2019 data 
s6_2019 <- readCDL(file = here(filepath, year, file))

#clean 2019 data 
s6_2019_clean <- s6_2019 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev,
         max_wt_elev_m) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
         date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = as.numeric(max_wt_elev_m))
  • Read in 2020 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appended_logger_files/2020
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

year = "2020"

file = "S6_bogwell_S6BW_Daily.dat"

#read in 2020-2022 data 
s6_2020 <- readCDL(file = here(filepath, year, file))

#clean 2020-2022 data 
s6_2020_clean <- s6_2020 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         max_wt_elev,
         max_wt_elev_m) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = max_wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
         date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = as.numeric(max_wt_elev_m))
  • Combine all S6 data from both the RealTimeData folder and annual_appnded_logger_files folder in Box
  • Note there is one observation per day
s6_all <- rbind(s6_se_clean, s6_2019_clean, s6_2020_clean) %>% 
  #remove one row where WTE = 0 
  subset(max_wt_elev_m > 0) %>% 
  #remove NA values
  filter(!is.na(max_wt_elev_m))

#set min and max year 
min_year = min(s6_all$year)
max_year = max(s6_all$year)

#plot
p_s6 <- ggplot(data = s6_all) + 
  geom_line(aes(x = datetime,
                y = max_wt_elev_m,
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Max Water Table Elevation (m): ", round(max_wt_elev_m, digits = 3)))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("S6 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_s6,
         tooltip = "text")

Bog Lake Fen (BOGLK) Shaft Encoder

  • Read in the 2020-2022 data from the following Box file path: External-MEF_DATA/DataDump/RealTimeData

  • Clean the 2020-2022 data into a tidy format

#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"

#read in the raw shaft encoder data 
boglk_se <- readCDL(file = here(filepath, "BLF_met_BogLakeW.dat"))

#clean the data 
boglk_se_clean <- boglk_se %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         wt_elev) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = max_wt_elev_ft * MetersPerFoot,
    max_wt_elev_m = as.numeric(max_wt_elev_m), 
    date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year))
  • Read in 2019 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appende_logger_files/2019
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

year = "2019"

file = "BLF_met_BogLakeW.dat"

#read in 2019 data 
boglk_2019 <- readCDL(file = here(filepath, year, file))

#clean 2019 data 
boglk_2019_clean <- boglk_2019 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         wt_elev) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
         date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = max_wt_elev_ft * MetersPerFoot,
    max_wt_elev_m = as.numeric(max_wt_elev_m))
  • Read in 2020 data from the following Box directory path: External-MEF_DATA/DataDump/annual_appende_logger_files/2020
#set file path to read in data from Box 
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"

year = "2020"

file = "BLF_met_BogLakeW.dat"

#read in 2020 data 
boglk_2020 <- readCDL(file = here(filepath, year, file))

#clean 2020 data 
boglk_2020_clean <- boglk_2020 %>% 
  clean_names() %>% 
  #select relevant columns
  select(timestamp,
         wt_elev) %>% 
  #rename columns 
  rename(datetime = timestamp,
         max_wt_elev_ft = wt_elev) %>% 
  #reformat columns into appropriate classes 
  mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"),
         date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
    max_wt_elev_ft = as.numeric(max_wt_elev_ft),
    max_wt_elev_m = max_wt_elev_ft * MetersPerFoot,
    max_wt_elev_m = as.numeric(max_wt_elev_m))
  • Combine all BOGLK data from both the RealTimeData folder and annual_appnded_logger_files folder in Box
  • Note there are multiple observations per day
boglk_all <- rbind(boglk_se_clean, boglk_2019_clean, boglk_2020_clean) %>% 
  #remove NA values
  filter(!is.na(max_wt_elev_m)) %>% 
  #remove duplicate observations 
  unique()

#set min and max year 
min_year = min(boglk_all$year)
max_year = max(boglk_all$year)

#plot
p_boglk <- ggplot(data = boglk_all) + 
  geom_line(aes(x = datetime,
                y = max_wt_elev_m,
                group = 1,
                text = paste0("Date: ", date, "\n",
                              "Max Water Table Elevation (m): ", round(max_wt_elev_m, digits = 3)))) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Water Table Elevation (m above sea level)",
       title = paste0("Bog Lake Fen (BOGLK) Shaft Encoder \n Water Table Elevation (", min_year, "-", max_year, ")") 
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7))

#interactive plot 
ggplotly(p_boglk,
         tooltip = "text")

1:1 Plots

Next, we want to compare manual stripchart streamflow values and shaft encoder streamflow values at each site during the same time period in order to assess the similarities between the two types of data collection. Ultimately, we hope the shaft encoders are collecting very similar readings to the stripcharts in order to facilitate the transition from the old stripchart data collection technique to the new shaft encoder data collection technique.

To make these comparisons, we use 1:1 scatterplots with manual stripchart data plotted on the X axis and shaft encoder data plotted on the Y axis. We will also plot a y = x line on the plot to help visually assess if the shaft encoder values mimic the stripchart values. If they do, the points will fall along the y = x line.

Lastly, at each site, we will also calculate the differences between stripchart and shaft encoder stramflow values as well as the standard deviation of the differences.

S1 1:1 Plot

  • Format the data

    • Note this is daily max WTE
#format stripchart data
bog1 <- bogwell_clean_s1 %>% 
  #remove unnecessary columns 
  select(-peatland, -flag) %>% 
  #create a column to note what type of data this is 
  mutate(type = "stripchart")

#format shaft encoder data 
se1 <- s1_all %>% 
  #extract date and year 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
         #note data type
         type = "shaft_encoder") %>% 
  #rename columns to join the data  
  rename(wte = wt_elev_m) %>% 
  group_by(date) %>% 
  #calculate max WTE per day 
  mutate(max_wte = max(wte)) %>% 
  #remove unnecessary columns 
  select(-datetime, -wt_elev_ft, -wte) %>% 
  distinct() %>% 
  #rename columns to join the data
  rename(wte = max_wte)

#join the data 
s1_join <- full_join(x = bog1, 
                     y = se1,
                     by = c("wte", "date", "year", "type"))

#subset the join to the time period where the two data sources overlap 
s1_join_sub <- s1_join %>% 
  pivot_wider(names_from = "type",
              values_from = "wte") %>% 
  #remove rows with NA values 
  filter(!is.na(shaft_encoder), 
         !is.na(stripchart))

#set min and max year 
min_year = min(s1_join_sub$year)
max_year = max(s1_join_sub$year)
  • Plot
#calculate the number of samples
s1_n <- nrow(s1_join_sub)

p1 <- ggplot(data = s1_join_sub) + 
  geom_point(aes(x = stripchart, 
                 y = shaft_encoder,
                 alpha = 0.1,
                 group = 1,
                 text = paste0("Date: ", date, "\n",
                               "Stripchart: ", stripchart, "\n",
                              "Shaft Encoder: ", round(shaft_encoder, digits = 3)))) + 
  #plot straight line to compare point spread 
  geom_abline(intercept = 0,
              slope = 1) + 
  theme_classic() + 
  labs(x = "Manual Stripchart", 
       y = "Shaft Encoder"
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7),
        legend.position = "none")

#interactive plot 
ggplotly(p = p1, 
         #set hovering text 
         tooltip = "text") %>% 
  layout(
    title = list(text = paste0(paste0("S1 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
                                    '<br>',
                                    '<sup>',
                                     #subtitle
                                     paste0("n = ", s1_n), 
                                    '</sup>')), 
         #title size 
         font = list(size = 12))

S1 Statistics

  • Calculate the difference between S1 manual stripchart streamflow values and S1 streamflow shaft encoder values
    • Identify the minimum, 1st quartile, median, mean, 3rd quartile, and maximum difference values using summary()
  • Calculate the standard deviation of the differences
s1_diff <- s1_join_sub %>% 
  mutate(diff = shaft_encoder - stripchart) 

summary(s1_diff$diff)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.17488 -0.01869  0.01519  0.09393  0.04801  1.54165
s1_sd <- round(sd(s1_diff$diff), digits = 5)

The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.35164.

  • Plot the differences between shaft encoder and stripchart streamflow values
#set min and max year 
min_year = min(s1_diff$year)
max_year = max(s1_diff$year)

#calculate the number of samples
s1_n <- nrow(s1_diff)

#plot 
ggplot(data = s1_diff) + 
  geom_histogram(aes(x = diff),
                 binwidth = 0.002) + 
  theme_classic() + 
  labs(x = "Differences Between Shaft Encoders - Stripcharts (m)", 
       y = "Frequency", 
       title = paste0("S1 Daily Max WTE Differences (", min_year, "-", max_year, ")"),
       caption = paste0("n = ", s1_n)
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))

S2 1:1 Plot

  • Format the data
#format stripchart data
bog2 <- bogwell_clean_s2 %>% 
  #remove unnecessary columns 
  select(-peatland, -flag) %>% 
  #create a column to note what type of data this is 
  mutate(type = "stripchart")

#format shaft encoder data 
se2 <- s2_se_clean %>% 
  #extract date and year 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
         #note data type
         type = "shaft_encoder") %>% 
  #remove unnecessary columns 
  select(-datetime, -max_wt_elev_ft) %>% 
  #rename columns to join the data  
  rename(wte = max_wt_elev_m)

#join the data 
s2_join <- full_join(x = bog2, 
                     y = se2,
                     by = c("wte", "date", "year", "type"))

#subset the join to the time period where the two data sources overlap 
s2_join_sub <- s2_join %>% 
  pivot_wider(names_from = "type",
              values_from = "wte") %>% 
  #remove rows with NA values 
  filter(!is.na(shaft_encoder), 
         !is.na(stripchart))

#set min and max year 
min_year = min(s2_join_sub$year)
max_year = max(s2_join_sub$year)
  • Plot
#calculate the number of samples
s2_n <- nrow(s2_join_sub)

p2 <- ggplot(data = s2_join_sub) + 
  geom_point(aes(x = stripchart, 
                 y = shaft_encoder,
                 alpha = 0.1,
                 group = 1,
                 text = paste0("Date: ", date, "\n", 
                               "Stripchart: ", stripchart, "\n",
                              "Shaft Encoder: ", round(shaft_encoder, digits = 3)))) + 
  #plot straight line to compare point spread 
  geom_abline(intercept = 0,
              slope = 1) + 
  theme_classic() + 
  labs(x = "Manual Stripchart", 
       y = "Shaft Encoder"
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7),
        legend.position = "none")

#interactive plot 
ggplotly(p = p2, 
         tooltip = "text") %>% 
  layout(title = list(text = paste0(paste0("S2 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
                                    '<br>',
                                    '<sup>',
                                     paste0("n = ", s2_n), 
                                    '</sup>')), 
         font = list(size = 12))

S2 Statistics

  • Calculate the difference between S2 manual stripchart streamflow values and S2 streamflow shaft encoder values
    • Identify the minimum, 1st quartile, median, mean, 3rd quartile, and maximum difference values using summary()
  • Calculate the standard deviation of the differences
s2_diff <- s2_join_sub %>% 
  mutate(diff = shaft_encoder - stripchart) 

summary(s2_diff$diff)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.018694 -0.002111  0.001167  0.001101  0.004255  0.046007
s2_sd <- round(sd(s2_diff$diff), digits = 5)

The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.00526.

  • Plot the differences between shaft encoder and stripchart streamflow values
#set min and max year 
min_year = min(s2_diff$year)
max_year = max(s2_diff$year)

#calculate the number of samples
s2_n <- nrow(s2_diff)

#plot 
ggplot(data = s2_diff) + 
  geom_histogram(aes(x = diff),
                 binwidth = 0.002) + 
  theme_classic() + 
  labs(x = "Differences Between Shaft Encoders - Stripcharts (m)", 
       y = "Frequency", 
       title = paste0("S2 Daily Max WTE Differences (", min_year, "-", max_year, ")"), 
       caption = paste0("n = ", s2_n)
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))

S3 1:1 Plot

The S3 site was omitted due to unreliable data.

S4 1:1 Plot

  • Format the data
#format stripchart data
bog4 <- bogwell_clean_s4 %>% 
  #remove unnecessary columns 
  select(-peatland, -flag) %>% 
  #create a column to note what type of data this is 
  mutate(type = "stripchart")

#format shaft encoder data 
se4 <- s4_2020_clean %>% 
  #extract date and year 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
         #note data type
         type = "shaft_encoder") %>% 
  #remove unnecessary columns 
  select(-datetime, -max_wt_elev_ft) %>% 
  #rename columns to join the data  
  rename(wte = max_wt_elev_m)

#join the data 
s4_join <- full_join(x = bog4, 
                     y = se4,
                     by = c("wte", "date", "year", "type"))

#subset the join to the time period where the two data sources overlap 
s4_join_sub <- s4_join %>% 
  pivot_wider(names_from = "type",
              values_from = "wte") %>% 
  #remove rows with NA values 
  filter(!is.na(shaft_encoder), 
         !is.na(stripchart))

#set min and max year 
min_year = min(s4_join_sub$year)
max_year = max(s4_join_sub$year)
  • Plot
#calculate the number of samples
s4_n <- nrow(s4_join_sub)

p4 <- ggplot(data = s4_join_sub) + 
  geom_point(aes(x = stripchart, 
                 y = shaft_encoder,
                 alpha = 0.1,
                 group = 1,
                 text = paste0("Date: ", date, "\n", 
                               "Stripchart: ", stripchart, "\n",
                              "Shaft Encoder: ", round(shaft_encoder, digits = 3)))) + 
  #plot straight line to compare point spread 
  geom_abline(intercept = 0,
              slope = 1) + 
  theme_classic() + 
  labs(x = "Manual Stripchart", 
       y = "Shaft Encoder"
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7),
        legend.position = "none")

#interactive plot 
ggplotly(p = p4, 
         tooltip = "text") %>% 
  layout(title = list(text = paste0(paste0("S4 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
                                    '<br>',
                                    '<sup>',
                                     paste0("n = ", s4_n), 
                                    '</sup>')),
         font = list(size = 12)
         )

S4 Statistics

  • Calculate the difference between S4 manual stripchart streamflow values and S4 streamflow shaft encoder values
    • Identify the minimum, 1st quartile, median, mean, 3rd quartile, and maximum difference values using summary()
  • Calculate the standard deviation of the differences
s4_diff <- s4_join_sub %>% 
  mutate(diff = shaft_encoder - stripchart) 

summary(s4_diff$diff)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.0200000  0.0000000  0.0000000 -0.0002602  0.0000000  0.0200000
s4_sd <- round(sd(s4_diff$diff), digits = 5)

The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.00607.

  • Plot the differences between shaft encoder and stripchart streamflow values
#set min and max year 
min_year = min(s4_diff$year)
max_year = max(s4_diff$year)

#calculate the number of samples
s4_n <- nrow(s4_diff)

#plot 
ggplot(data = s4_diff) + 
  geom_histogram(aes(x = diff),
                 binwidth = 0.01) + 
  theme_classic() + 
  labs(x = "Differences Between Shaft Encoders - Stripcharts (m)", 
       y = "Frequency", 
       title = paste0("S4 Daily Max WTE Differences (", min_year, "-", max_year, ")"), 
       caption = paste0("n = ", s4_n)
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))

S5 1:1 Plot

  • Format the data
#format stripchart data
bog5 <- bogwell_clean_s5 %>% 
  #remove unnecessary columns 
  select(-peatland, -flag) %>% 
  #create a column to note what type of data this is 
  mutate(type = "stripchart")

#format shaft encoder data 
se5 <- s5_all %>% 
  #extract date and year 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
         #note data type
         type = "shaft_encoder") %>% 
  #remove unnecessary columns 
  select(-datetime, -max_wt_elev_ft) %>% 
  #rename columns to join the data  
  rename(wte = max_wt_elev_m)

#join the data 
s5_join <- full_join(x = bog5, 
                     y = se5,
                     by = c("wte", "date", "year", "type"))

#subset the join to the time period where the two data sources overlap 
s5_join_sub <- s5_join %>% 
  pivot_wider(names_from = "type",
              values_from = "wte") %>% 
  #remove rows with NA values 
  filter(!is.na(shaft_encoder), 
         !is.na(stripchart))

#set min and max year 
min_year = min(s5_join_sub$year)
max_year = max(s5_join_sub$year)
  • Plot
#calculate the number of samples
s5_n <- nrow(s5_join_sub)

p5 <- ggplot(data = s5_join_sub) + 
  geom_point(aes(x = stripchart, 
                 y = shaft_encoder,
                 alpha = 0.1,
                 group = 1,
                 text = paste0("Date: ", date, "\n", 
                               "Stripchart: ", stripchart, "\n",
                              "Shaft Encoder: ", round(shaft_encoder, digits = 3)))) + 
  #plot straight line to compare point spread 
  geom_abline(intercept = 0,
              slope = 1) + 
  theme_classic() + 
  labs(x = "Manual Stripchart", 
       y = "Shaft Encoder"
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7),
        legend.position = "none")

#interactive plot 
ggplotly(p = p5, 
         tooltip = "text") %>% 
  layout(title = list(text = paste0(paste0("S5 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
                                    '<br>',
                                    '<sup>',
                                     paste0("n = ", s5_n), 
                                    '</sup>')), 
         font = list(size = 12)
         )

S5 Statistics

  • Calculate the difference between S4 manual stripchart streamflow values and S5 streamflow shaft encoder values
    • Identify the minimum, 1st quartile, median, mean, 3rd quartile, and maximum difference values using summary()
  • Calculate the standard deviation of the differences
s5_diff <- s5_join_sub %>% 
  mutate(diff = shaft_encoder - stripchart) 

summary(s5_diff$diff)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.0200000  0.0000000  0.0000000  0.0003529  0.0000000  0.0200000
s5_sd <- round(sd(s5_diff$diff), digits = 5)

The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.00734.

  • Plot the differences between shaft encoder and stripchart streamflow values
#set min and max year 
min_year = min(s5_diff$year)
max_year = max(s5_diff$year)

#calculate the number of samples
s5_n <- nrow(s5_diff)

#plot 
ggplot(data = s5_diff) + 
  geom_histogram(aes(x = diff),
                 binwidth = 0.01) + 
  theme_classic() + 
  labs(x = "Differences Between Shaft Encoders - Stripcharts (m)", 
       y = "Frequency", 
       title = paste0("S5 Daily Max WTE Differences (", min_year, "-", max_year, ")"), 
       caption = paste0("n = ", s5_n)
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))

S6 1:1 Plot

  • Format the data
#format stripchart data
bog6 <- bogwell_clean_s6 %>% 
  #remove unnecessary columns 
  select(-peatland, -flag) %>% 
  #create a column to note what type of data this is 
  mutate(type = "stripchart")

#format shaft encoder data 
se6 <- s6_all %>% 
  #extract date and year 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
         #note data type
         type = "shaft_encoder") %>% 
  #remove unnecessary columns 
  select(-datetime, -max_wt_elev_ft) %>% 
  #rename columns to join the data  
  rename(wte = max_wt_elev_m)

#join the data 
s6_join <- full_join(x = bog6, 
                     y = se6,
                     by = c("wte", "date", "year", "type"))

#subset the join to the time period where the two data sources overlap 
s6_join_sub <- s6_join %>% 
  pivot_wider(names_from = "type",
              values_from = "wte") %>% 
  #unnest the list to turn NULL values to NA values
  unnest() %>% 
  #remove rows with NA values 
  filter(!is.na(shaft_encoder), 
         !is.na(stripchart)) %>% 
  #remove duplicate rows 
  unique()

#set min and max year 
min_year = min(s6_join_sub$year)
max_year = max(s6_join_sub$year)
  • Plot
#calculate the number of samples
s6_n <- nrow(s6_join_sub)

p6 <- ggplot(data = s6_join_sub) + 
  geom_point(aes(x = stripchart, 
                 y = shaft_encoder,
                 alpha = 0.1,
                 group = 1,
                 text = paste0("Date: ", date, "\n", 
                               "Stripchart: ", stripchart, "\n",
                              "Shaft Encoder: ", round(shaft_encoder, digits = 3)))) + 
  #plot straight line to compare point spread 
  geom_abline(intercept = 0,
              slope = 1) + 
  theme_classic() + 
  labs(x = "Manual Stripchart", 
       y = "Shaft Encoder"
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7),
        legend.position = "none")

#interactive plot 
ggplotly(p = p6, 
         tooltip = "text") %>% 
  layout(title = list(text = paste0(paste0("S6 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
                                    '<br>',
                                    '<sup>',
                                     paste0("n = ", s6_n), 
                                    '</sup>')), 
         font = list(size = 12)
         )

S6 Statistics

  • Calculate the difference between S6 manual stripchart streamflow values and S6 streamflow shaft encoder values
    • Identify the minimum, 1st quartile, median, mean, 3rd quartile, and maximum difference values using summary()
  • Calculate the standard deviation of the differences
s6_diff <- s6_join_sub %>% 
  mutate(diff = shaft_encoder - stripchart) 

summary(s6_diff$diff)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.080000 -0.010000  0.000000 -0.001182  0.010000  0.030000
s6_sd <- round(sd(s6_diff$diff), digits = 5)

The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.01252.

  • Plot the differences between shaft encoder and stripchart streamflow values
#set min and max year 
min_year = min(s6_diff$year)
max_year = max(s6_diff$year)

#calculate the number of samples
s6_n <- nrow(s6_diff)

#plot 
ggplot(data = s6_diff) + 
  geom_histogram(aes(x = diff),
                 binwidth = 0.01) + 
  theme_classic() + 
  labs(x = "Differences Between Shaft Encoders - Stripcharts (m)", 
       y = "Frequency", 
       title = paste0("S6 Daily Max WTE Differences (", min_year, "-", max_year, ")"), 
       caption = paste0("n = ", s6_n)
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))

Bog Lake Fen (BOGLK) 1:1 Plot

  • Format the data

    • Calculate the daily WTE max
#format stripchart data
boglk <- bogwell_clean_boglk %>% 
  #remove unnecessary columns 
  select(-peatland, -flag) %>% 
  #create a column to note what type of data this is 
  mutate(type = "stripchart") %>% 
  unique()

#format shaft encoder data 
se_boglk <- boglk_all %>% 
  #extract date and year 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         year = as.numeric(year),
         #note data type
         type = "shaft_encoder") %>% 
  #remove unnecessary columns 
  select(-datetime, -max_wt_elev_ft) %>% 
  #rename columns to join the data  
  rename(wte = max_wt_elev_m) %>% 
  #calculate daily max WTE 
  group_by(date) %>% 
  summarise(max_wte = max(wte),
            date = date,
            year = year,
            type = "shaft_encoder"
            ) %>% 
  #remove duplicate observations 
  unique()

#join the data 
boglk_join <- full_join(x = boglk, 
                     y = se_boglk,
                     by = c("wte" = "max_wte", "date", "year", "type"))

#subset the join to the time period where the two data sources overlap 
boglk_join_sub <- boglk_join %>% 
  pivot_wider(names_from = "type",
              values_from = "wte") %>% 
  unnest() %>% 
  #remove rows with NA values 
  filter(!is.na(shaft_encoder), 
         !is.na(stripchart)) %>% 
  unique()

#set min and max year 
min_year = min(boglk_join_sub$year)
max_year = max(boglk_join_sub$year)
  • Plot
#calculate the number of samples
boglk_n <- nrow(boglk_join_sub)

p_boglk <- ggplot(data = boglk_join_sub) + 
  geom_point(aes(x = stripchart,
                 y = shaft_encoder,
                 alpha = 0.1,
                 group = 1,
                 text = paste0("Date: ", date, "\n",
                               "Stripchart: ", stripchart, "\n",
                              "Shaft Encoder: ", round(shaft_encoder, digits = 3)))) +
  #plot straight line to compare point spread 
  geom_abline(intercept = 0,
              slope = 1) + 
  theme_classic() + 
  labs(x = "Manual Stripchart", 
       y = "Shaft Encoder"
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7),
        legend.position = "none") 


#interactive plot 
ggplotly(p = p_boglk, 
         tooltip = "text") %>% 
  layout(title = list(text = paste0(paste0("Bog Lake Fen (BOGLK) \n Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
                                    '<br>',
                                    '<sup>',
                                     paste0("n = ", boglk_n), 
                                    '</sup>')), 
         font = list(size = 12)
         )

Bog Lake Fen (BOGLK) Statistics

  • Calculate the difference between BOGLK manual stripchart streamflow values and BOGLK streamflow shaft encoder values
    • Identify the minimum, 1st quartile, median, mean, 3rd quartile, and maximum difference values using summary()
  • Calculate the standard deviation of the differences
boglk_diff <- boglk_join_sub %>% 
  mutate(diff = shaft_encoder - stripchart) 

summary(boglk_diff$diff)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.084811 -0.035978 -0.016262 -0.019127 -0.003954  0.068180
boglk_sd <- round(sd(boglk_diff$diff), digits = 5)

The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.02444.

  • Plot the differences between shaft encoder and stripchart streamflow values
#set min and max year 
min_year = min(boglk_diff$year)
max_year = max(boglk_diff$year)

#calculate the number of samples
boglk_n <- nrow(boglk_diff)

#plot 
ggplot(data = boglk_diff) + 
  geom_histogram(aes(x = diff),
                 binwidth = 0.005) + 
  theme_classic() + 
  labs(x = "Differences Between Shaft Encoders - Stripcharts (m)", 
       y = "Frequency", 
       title = paste0("Bog Lake Fen (BOGLK) \n Daily Max WTE Differences (", min_year, "-", max_year, ")"), 
       caption = paste0("n = ", boglk_n)
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))

Correlation Matrices

  • Use ggpairs() to create correlation matrices to compare shaft encoder WTE data from each site

library(GGally)

#format the data 
s1 <- s1_join_sub %>% 
  mutate(site = "s1")
s2 <- s2_join_sub %>% 
  mutate(site = "s2")
s4 <- s4_join_sub %>% 
  mutate(site = "s4")
s5 <- s5_join_sub %>% 
  mutate(site = "s5")
s6 <- s6_join_sub %>% 
  mutate(site = "s6")
b <- boglk_join_sub %>% 
  mutate(site = "boglk")

cor_all <- rbind(s1, s2, s4, s5, s6, b) %>% 
  select(-stripchart)

cor_all_wide <- cor_all %>% 
  pivot_wider(names_from = "site", 
              values_from = "shaft_encoder")
#create a function to create linear regression line 
my_fn <- function(data, mapping, ...){
  p <- ggplot(data = data, mapping = mapping) + 
    geom_point() + 
    #geom_smooth(method=loess, fill="red", color="red", ...) +
    geom_smooth(method=lm, fill="blue", color="blue", ...)
  p
}

ggpairs(data = cor_all_wide, 
        columns = c("s1", "s2", "s4", "s5", "s6", "boglk"),
        title = "Shaft Encoder Correlations by Site", 
        xlab = "WTE (m)", 
        ylab = "WTE (m)", 
        columnLabels = c("S1", "S2", "S4", "S5", "S6", "BOGLK"),
        lower = list(continuous = my_fn), 
        ggplot2::aes(alpha = 0.01)) + 
  theme_minimal() + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7)
        )

p_pairs <- ggpairs(data = cor_all_wide, 
        columns = c("s1", "s2", "s4", "s5", "s6", "boglk"), 
        ggplot2::aes(colour = as.factor(year), alpha = 0.01)) + 
  theme_minimal() + 
  labs(title = "Shaft Encoder Correlations by Site") + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, 
                                     size = 7), 
        axis.title.x = element_text(angle = 45, 
                                    vjust = 1, 
                                    color = "black")
        )

print(p_pairs)

Run this code chunk to knit to HTML without using the knit button in RStudio

  • Save as index.html to render GitHub Page

Code Accessibility

Note that this .Rmd file is being knit as index.html into the public bogwell GitHub repository in order to update the GitHub pages website, where the plots can be viewed online.